@aiello/wechat-to-markdown 1.2.10 → 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,41 +1,39 @@
1
+ "use strict";
1
2
  var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
5
6
  var __getProtoOf = Object.getPrototypeOf;
6
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
7
- var __markAsModule = (target) => __defProp(target, "__esModule", { value: true });
8
8
  var __export = (target, all) => {
9
- __markAsModule(target);
10
9
  for (var name in all)
11
10
  __defProp(target, name, { get: all[name], enumerable: true });
12
11
  };
13
- var __reExport = (target, module2, desc) => {
14
- if (module2 && typeof module2 === "object" || typeof module2 === "function") {
15
- for (let key of __getOwnPropNames(module2))
16
- if (!__hasOwnProp.call(target, key) && key !== "default")
17
- __defProp(target, key, { get: () => module2[key], enumerable: !(desc = __getOwnPropDesc(module2, key)) || desc.enumerable });
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
18
17
  }
19
- return target;
20
- };
21
- var __toModule = (module2) => {
22
- return __reExport(__markAsModule(__defProp(module2 != null ? __create(__getProtoOf(module2)) : {}, "default", module2 && module2.__esModule && "default" in module2 ? { get: () => module2.default, enumerable: true } : { value: module2, enumerable: true })), module2);
18
+ return to;
23
19
  };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
22
+ mod
23
+ ));
24
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
24
25
 
25
26
  // src/index.ts
26
- __export(exports, {
27
+ var src_exports = {};
28
+ __export(src_exports, {
27
29
  Status: () => Status,
28
30
  default: () => transformHtml2Markdown,
29
31
  getTurnDownService: () => getTurnDownService,
30
32
  parseHTML: () => parseHTML
31
33
  });
32
-
33
- // node_modules/tsup/assets/cjs_shims.js
34
- var importMetaUrlShim = typeof document === "undefined" ? new (require("url")).URL("file:" + __filename).href : document.currentScript && document.currentScript.src || new URL("main.js", document.baseURI).href;
35
-
36
- // src/index.ts
37
- var import_axios = __toModule(require("axios"));
38
- var import_cheerio2 = __toModule(require("cheerio"));
34
+ module.exports = __toCommonJS(src_exports);
35
+ var import_axios = __toESM(require("axios"), 1);
36
+ var import_cheerio2 = require("cheerio");
39
37
 
40
38
  // src/error.ts
41
39
  var errObj = {
@@ -43,18 +41,18 @@ var errObj = {
43
41
  };
44
42
 
45
43
  // src/type.ts
46
- var Status;
47
- (function(Status2) {
44
+ var Status = /* @__PURE__ */ ((Status2) => {
48
45
  Status2[Status2["Success"] = 200] = "Success";
49
46
  Status2[Status2["Fail"] = 400] = "Fail";
50
- })(Status || (Status = {}));
47
+ return Status2;
48
+ })(Status || {});
51
49
 
52
50
  // src/turndownCode.ts
53
- var import_turndown = __toModule(require("turndown"));
54
- var import_turndown_plugin_gfm = __toModule(require("@guyplusplus/turndown-plugin-gfm"));
51
+ var import_turndown = __toESM(require("turndown"), 1);
52
+ var import_turndown_plugin_gfm = __toESM(require("@guyplusplus/turndown-plugin-gfm"), 1);
55
53
 
56
54
  // src/formatHtml.ts
57
- var import_cheerio = __toModule(require("cheerio"));
55
+ var import_cheerio = __toESM(require("cheerio"), 1);
58
56
  function formatCode(htmlStr) {
59
57
  let code = htmlStr;
60
58
  code = code.replace(/<br>/gi, "\n");
@@ -128,7 +126,9 @@ function getTurnDownService(params) {
128
126
  },
129
127
  replacement(content, _node) {
130
128
  const node = _node;
131
- const cover = decodeURIComponent(node.getAttribute("data-cover") || "");
129
+ const cover = decodeURIComponent(
130
+ node.getAttribute("data-cover") || ""
131
+ );
132
132
  const u = new URL(params.url);
133
133
  u.hash = `js_mp_video_container_${videoCounter++}`;
134
134
  return cover ? `
@@ -163,10 +163,14 @@ async function parseHTML(htmlRaw, meta) {
163
163
  const $ = (0, import_cheerio2.load)(htmlRaw);
164
164
  let title = $("#activity-name").text();
165
165
  title = title.trim() || "";
166
- const author = Array.from(new Set([
167
- (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
168
- ...$("#js_name").text().split("\n")
169
- ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
166
+ const author = Array.from(
167
+ new Set(
168
+ [
169
+ (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
170
+ ...$("#js_name").text().split("\n")
171
+ ].map((item) => item ? item.trim() : "").filter(Boolean)
172
+ )
173
+ ).join("\n");
170
174
  const htmlEl = $("#js_content");
171
175
  const html = htmlEl.html();
172
176
  if (html && html.length > 0) {
@@ -178,7 +182,7 @@ async function parseHTML(htmlRaw, meta) {
178
182
  ` + res;
179
183
  return {
180
184
  success: true,
181
- code: Status.Success,
185
+ code: 200 /* Success */,
182
186
  data: {
183
187
  title,
184
188
  author,
@@ -186,9 +190,11 @@ async function parseHTML(htmlRaw, meta) {
186
190
  }
187
191
  };
188
192
  }
189
- return getError(Status.Fail);
193
+ return getError(400 /* Fail */);
190
194
  }
191
- async function transformHtml2Markdown(url) {
195
+ async function transformHtml2Markdown(url, options = {}) {
196
+ const { axiosConfig = {} } = options;
197
+ const { headers = {}, ...restConfig } = axiosConfig;
192
198
  const u = new URL(url);
193
199
  u.searchParams.delete("poc_token");
194
200
  try {
@@ -198,13 +204,15 @@ async function transformHtml2Markdown(url) {
198
204
  headers: {
199
205
  DNT: "1",
200
206
  "Upgrade-Insecure-Requests": "1",
201
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
202
- }
207
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
208
+ ...headers
209
+ },
210
+ ...restConfig
203
211
  });
204
212
  return parseHTML(res.data, { url: u.href });
205
213
  } catch (err) {
206
214
  console.log(err);
207
- return getError(Status.Fail);
215
+ return getError(400 /* Fail */);
208
216
  }
209
217
  }
210
218
  // Annotate the CommonJS export names for ESM import in node:
@@ -213,4 +221,4 @@ async function transformHtml2Markdown(url) {
213
221
  getTurnDownService,
214
222
  parseHTML
215
223
  });
216
- //# sourceMappingURL=index.cjs.map
224
+ //# sourceMappingURL=index.cjs.map
@@ -1,7 +1 @@
1
- {
2
- "version": 3,
3
- "sources": ["../src/index.ts", "../node_modules/tsup/assets/cjs_shims.js", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
- "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n },\n })\n\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
- "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAqB;;;AEDd,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,wBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,qCAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AJ1EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,yBAAgC,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,IAAI,0BAAK;AAEf,MAAI,QAAQ,EAAE,kBAAkB;AAEhC,UAAQ,MAAM,UAAU;AACxB,QAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,IACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,IAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,IAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,QAAM,SAAS,EAAE;AACjB,QAAM,OAAO,OAAO;AAEpB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,MAAM,SAAS;AAE5C,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT,MAAM,OAAO;AAAA,MACb,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA;AAAA;AAAA;AAKrB,SAAO,SAAS,OAAO;AAAA;AAG3B,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI;AACA,UAAM,MAAM,MAAM,qBAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA;AAAA;AAIZ,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,WAC/B,KAAP;AACE,YAAQ,IAAI;AACZ,WAAO,SAAS,OAAO;AAAA;AAAA;",
6
- "names": []
7
- }
1
+ {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/turndownCode.ts","../src/formatHtml.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport default async function transformHtml2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,mBAA0C;AAC1C,IAAAA,kBAAqB;;;ACDd,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWC,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,eAAAC,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAAC,QAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,6BAAAC,QAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,UAAU,KAAK;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,QAAQ;AAAA,IACjB,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK,SAAS;AAC1C,aAAO,OAAO;AAAA,IAClB;AAAA,EACJ,CAAC;AAEL,SAAO;AACX;;;AH3EA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIA,eAAsB,UAAU,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,QAAI,sBAAK,OAAO;AAEtB,MAAI,QAAQ,EAAE,gBAAgB,EAAE,KAAK;AAErC,UAAQ,MAAM,KAAK,KAAK;AACxB,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO,uBAAoB;AAC/B;AASA,eAAO,uBACH,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,aAAAC,QAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC;AAAA,EAC9C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["import_cheerio","Status","cheerio","turnDownService","TurndownPluginGfm","axios"]}
package/dist/index.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import { AxiosRequestConfig } from 'axios';
1
2
  import turnDownService from 'turndown';
2
3
 
3
4
  interface TurnDownResult {
@@ -39,6 +40,12 @@ declare function parseHTML(htmlRaw: string, meta: {
39
40
  content: string;
40
41
  };
41
42
  }>;
42
- declare function transformHtml2Markdown(url: string): Promise<TurnDownResult>;
43
+ /**
44
+ * 支持添加代理服务器
45
+ */
46
+ interface TransformHtml2MarkdownOptions {
47
+ axiosConfig?: AxiosRequestConfig;
48
+ }
49
+ declare function transformHtml2Markdown(url: string, options?: TransformHtml2MarkdownOptions): Promise<TurnDownResult>;
43
50
 
44
51
  export { Status, TurnDownResult, transformHtml2Markdown as default, getTurnDownService, parseHTML };
package/dist/index.js CHANGED
@@ -8,11 +8,11 @@ var errObj = {
8
8
  };
9
9
 
10
10
  // src/type.ts
11
- var Status;
12
- (function(Status2) {
11
+ var Status = /* @__PURE__ */ ((Status2) => {
13
12
  Status2[Status2["Success"] = 200] = "Success";
14
13
  Status2[Status2["Fail"] = 400] = "Fail";
15
- })(Status || (Status = {}));
14
+ return Status2;
15
+ })(Status || {});
16
16
 
17
17
  // src/turndownCode.ts
18
18
  import turnDownService from "turndown";
@@ -93,7 +93,9 @@ function getTurnDownService(params) {
93
93
  },
94
94
  replacement(content, _node) {
95
95
  const node = _node;
96
- const cover = decodeURIComponent(node.getAttribute("data-cover") || "");
96
+ const cover = decodeURIComponent(
97
+ node.getAttribute("data-cover") || ""
98
+ );
97
99
  const u = new URL(params.url);
98
100
  u.hash = `js_mp_video_container_${videoCounter++}`;
99
101
  return cover ? `
@@ -128,10 +130,14 @@ async function parseHTML(htmlRaw, meta) {
128
130
  const $ = load(htmlRaw);
129
131
  let title = $("#activity-name").text();
130
132
  title = title.trim() || "";
131
- const author = Array.from(new Set([
132
- (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
133
- ...$("#js_name").text().split("\n")
134
- ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
133
+ const author = Array.from(
134
+ new Set(
135
+ [
136
+ (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
137
+ ...$("#js_name").text().split("\n")
138
+ ].map((item) => item ? item.trim() : "").filter(Boolean)
139
+ )
140
+ ).join("\n");
135
141
  const htmlEl = $("#js_content");
136
142
  const html = htmlEl.html();
137
143
  if (html && html.length > 0) {
@@ -143,7 +149,7 @@ async function parseHTML(htmlRaw, meta) {
143
149
  ` + res;
144
150
  return {
145
151
  success: true,
146
- code: Status.Success,
152
+ code: 200 /* Success */,
147
153
  data: {
148
154
  title,
149
155
  author,
@@ -151,9 +157,11 @@ async function parseHTML(htmlRaw, meta) {
151
157
  }
152
158
  };
153
159
  }
154
- return getError(Status.Fail);
160
+ return getError(400 /* Fail */);
155
161
  }
156
- async function transformHtml2Markdown(url) {
162
+ async function transformHtml2Markdown(url, options = {}) {
163
+ const { axiosConfig = {} } = options;
164
+ const { headers = {}, ...restConfig } = axiosConfig;
157
165
  const u = new URL(url);
158
166
  u.searchParams.delete("poc_token");
159
167
  try {
@@ -163,13 +171,15 @@ async function transformHtml2Markdown(url) {
163
171
  headers: {
164
172
  DNT: "1",
165
173
  "Upgrade-Insecure-Requests": "1",
166
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
167
- }
174
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
175
+ ...headers
176
+ },
177
+ ...restConfig
168
178
  });
169
179
  return parseHTML(res.data, { url: u.href });
170
180
  } catch (err) {
171
181
  console.log(err);
172
- return getError(Status.Fail);
182
+ return getError(400 /* Fail */);
173
183
  }
174
184
  }
175
185
  export {
@@ -178,4 +188,4 @@ export {
178
188
  getTurnDownService,
179
189
  parseHTML
180
190
  };
181
- //# sourceMappingURL=index.js.map
191
+ //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1,7 +1 @@
1
- {
2
- "version": 3,
3
- "sources": ["../src/index.ts", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
- "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n },\n })\n\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
- "mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,oBAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AH1EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,yBAAgC,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,IAAI,KAAK;AAEf,MAAI,QAAQ,EAAE,kBAAkB;AAEhC,UAAQ,MAAM,UAAU;AACxB,QAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,IACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,IAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,IAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,QAAM,SAAS,EAAE;AACjB,QAAM,OAAO,OAAO;AAEpB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,MAAM,SAAS;AAE5C,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT,MAAM,OAAO;AAAA,MACb,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA;AAAA;AAAA;AAKrB,SAAO,SAAS,OAAO;AAAA;AAG3B,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA;AAAA;AAIZ,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,WAC/B,KAAP;AACE,YAAQ,IAAI;AACZ,WAAO,SAAS,OAAO;AAAA;AAAA;",
6
- "names": []
7
- }
1
+ {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/turndownCode.ts","../src/formatHtml.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport default async function transformHtml2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],"mappings":";AAAA,OAAO,WAAmC;AAC1C,SAAS,YAAY;;;ACDd,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWA,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACRlB,OAAO,qBAAqB;AAC5B,OAAO,uBAAuB;;;ACJ9B,OAAO,aAAa;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,oBAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,UAAU,KAAK;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,QAAQ;AAAA,IACjB,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK,SAAS;AAC1C,aAAO,OAAO;AAAA,IAClB;AAAA,EACJ,CAAC;AAEL,SAAO;AACX;;;AH3EA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIA,eAAsB,UAAU,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,IAAI,KAAK,OAAO;AAEtB,MAAI,QAAQ,EAAE,gBAAgB,EAAE,KAAK;AAErC,UAAQ,MAAM,KAAK,KAAK;AACxB,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO,uBAAoB;AAC/B;AASA,eAAO,uBACH,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC;AAAA,EAC9C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["Status"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiello/wechat-to-markdown",
3
- "version": "1.2.10",
3
+ "version": "1.2.11",
4
4
  "description": "解析微信文章 URL 为 markdown",
5
5
  "author": "Aiello Chan<aiello.chan@gmail.com>",
6
6
  "keywords": [